require(plm)
require(runjags)
require(coda)

#### data preparation ####
LDP.data.Study2 <- read.csv("LDP_data_Study2.csv")

# exclude periods with no speech
# PS = plenary session, BC = Budget Committee, OC = other committees
LDP.personnel.period.date.Study2 <- as.Date(c("2003-11-19", "2004-09-27", "2005-09-21", 
                                              "2005-10-31", "2006-09-26", "2007-08-27", 
                                              "2007-09-26", "2008-08-02", "2008-09-24", 
                                              "2009-09-16", "2010-09-09", "2011-09-30", 
                                              "2012-10-04", "2012-12-26", "2014-09-03", 
                                              "2014-12-24", "2015-10-07", "2016-08-03", 
                                              "2017-08-03", "2017-11-01"))

LDP.PS.data.Study2 <- LDP.BC.data.Study2 <- LDP.OC.data.Study2 <- LDP.data.Study2
for (i in 1:(length(LDP.personnel.period.date.Study2) - 1)) {
  temp.data <- subset(LDP.data.Study2, time == i)
  if (sum(temp.data$PS.char > 0) == 0) {
    LDP.PS.data.Study2 <- subset(LDP.PS.data.Study2, time != i)
  }
  if (sum(temp.data$BC.char > 0) == 0) {
    LDP.BC.data.Study2 <- subset(LDP.BC.data.Study2, time != i)
  }
  if (sum(temp.data$OC.char > 0) == 0) {
    LDP.OC.data.Study2 <- subset(LDP.OC.data.Study2, time != i)
  }
}

# discard MPs whose ideal points are missing
LDP.PS.data.Study2 <- subset(LDP.PS.data.Study2, ! is.na(distance.1))
LDP.BC.data.Study2 <- subset(LDP.BC.data.Study2, ! is.na(distance.1))
LDP.OC.data.Study2 <- subset(LDP.OC.data.Study2, ! is.na(distance.1))

# standardize ideological distance variables
LDP.PS.data.Study2$distance.1 <- scale(LDP.PS.data.Study2$distance.1)
LDP.PS.data.Study2$distance.2 <- scale(LDP.PS.data.Study2$distance.2)
LDP.BC.data.Study2$distance.1 <- scale(LDP.BC.data.Study2$distance.1)
LDP.BC.data.Study2$distance.2 <- scale(LDP.BC.data.Study2$distance.2)
LDP.OC.data.Study2$distance.1 <- scale(LDP.OC.data.Study2$distance.1)
LDP.OC.data.Study2$distance.2 <- scale(LDP.OC.data.Study2$distance.2)
LDP.PS.data.Study2$distance.1.w.P <- scale(LDP.PS.data.Study2$distance.1.w.P)
LDP.PS.data.Study2$distance.2.w.P <- scale(LDP.PS.data.Study2$distance.2.w.P)
LDP.BC.data.Study2$distance.1.w.P <- scale(LDP.BC.data.Study2$distance.1.w.P)
LDP.BC.data.Study2$distance.2.w.P <- scale(LDP.BC.data.Study2$distance.2.w.P)
LDP.OC.data.Study2$distance.1.w.P <- scale(LDP.OC.data.Study2$distance.1.w.P)
LDP.OC.data.Study2$distance.2.w.P <- scale(LDP.OC.data.Study2$distance.2.w.P)

# convert data frames to the pdata.frame class of the plm package
LDP.PS.pdata.Study2 <- pdata.frame(LDP.PS.data.Study2, c("name_jp", "time"))
LDP.BC.pdata.Study2 <- pdata.frame(LDP.BC.data.Study2, c("name_jp", "time"))
LDP.OC.pdata.Study2 <- pdata.frame(LDP.OC.data.Study2, c("name_jp", "time"))

#### main analyses (Table 3) ####
Study2.LDP.PS <- plm(PS.char ~ distance.1 + distance.2 + 
                       top.leader + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = LDP.PS.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.LDP.PS, 
              vcov = vcovHC(Study2.LDP.PS, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)
nrow(LDP.PS.pdata.Study2)  # number of observations
length(unique(LDP.PS.pdata.Study2$name_jp))  # number of MPs
length(unique(LDP.PS.pdata.Study2$time))  # number of periods

round(exp(Study2.LDP.PS$coefficients[2]), 3)  # substantive interpretation

#### analysis of committees (Online Appendix C) ####
## Budget Committee
Study2.LDP.BC <- plm(BC.char ~ distance.1 + distance.2 + 
                       top.leader + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = LDP.BC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.LDP.BC, 
              vcov = vcovHC(Study2.LDP.BC, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)
nrow(LDP.BC.pdata.Study2)  # number of observations
length(unique(LDP.BC.pdata.Study2$name_jp))  # number of MPs
length(unique(LDP.BC.pdata.Study2$time))  # number of periods

## other committees
Study2.LDP.OC <- plm(OC.char ~ distance.1 + distance.2 + 
                       top.leader + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = LDP.OC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.LDP.OC, 
              vcov = vcovHC(Study2.LDP.OC, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)
nrow(LDP.OC.pdata.Study2)  # number of observations
length(unique(LDP.OC.pdata.Study2$name_jp))  # number of MPs
length(unique(LDP.OC.pdata.Study2$time))  # number of periods

round(exp(Study2.LDP.OC$coefficients[3]), 3)  # substantive interpretation

#### robustness check: other dependent variables (Online Appendix D.1) ####
## number of speeches
Study2.LDP.PS.count <- plm(PS.count ~ distance.1 + distance.2 + 
                             top.leader + chair + minister + jr.minister + 
                             female + age + dynastic + log.term + 
                             vote.share * SMD + zombie + 
                             E.44 + E.45 + E.46 + E.47, 
                           data = LDP.PS.pdata.Study2, 
                           model = "pooling")
round(summary(Study2.LDP.PS.count, 
              vcov = vcovHC(Study2.LDP.PS.count, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.LDP.BC.count <- plm(BC.count ~ distance.1 + distance.2 + 
                             top.leader + chair + minister + jr.minister + 
                             female + age + dynastic + log.term + 
                             vote.share * SMD + zombie + 
                             E.44 + E.45 + E.46 + E.47, 
                           data = LDP.BC.pdata.Study2, 
                           model = "pooling")
round(summary(Study2.LDP.BC.count, 
              vcov = vcovHC(Study2.LDP.BC.count, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.LDP.OC.count <- plm(OC.count ~ distance.1 + distance.2 + 
                             top.leader + chair + minister + jr.minister + 
                             female + age + dynastic + log.term + 
                             vote.share * SMD + zombie + 
                             E.44 + E.45 + E.46 + E.47, 
                           data = LDP.OC.pdata.Study2, 
                           model = "pooling")
round(summary(Study2.LDP.OC.count, 
              vcov = vcovHC(Study2.LDP.OC.count, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

## making one or more speeches
Study2.LDP.PS.bin <- plm(PS.bin ~ distance.1 + distance.2 + 
                           top.leader + chair + minister + jr.minister + 
                           female + age + dynastic + log.term + 
                           vote.share * SMD + zombie + 
                           E.44 + E.45 + E.46 + E.47, 
                         data = LDP.PS.pdata.Study2, 
                         model = "pooling")
round(summary(Study2.LDP.PS.bin, 
              vcov = vcovHC(Study2.LDP.PS.bin, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.LDP.BC.bin <- plm(BC.bin ~ distance.1 + distance.2 + 
                           top.leader + chair + minister + jr.minister + 
                           female + age + dynastic + log.term + 
                           vote.share * SMD + zombie + 
                           E.44 + E.45 + E.46 + E.47, 
                         data = LDP.BC.pdata.Study2, 
                         model = "pooling")
round(summary(Study2.LDP.BC.bin, 
              vcov = vcovHC(Study2.LDP.BC.bin, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.LDP.OC.bin <- plm(OC.bin ~ distance.1 + distance.2 + 
                           top.leader + chair + minister + jr.minister + 
                           female + age + dynastic + log.term + 
                           vote.share * SMD + zombie + 
                           E.44 + E.45 + E.46 + E.47, 
                         data = LDP.OC.pdata.Study2, 
                         model = "pooling")
round(summary(Study2.LDP.OC.bin, 
              vcov = vcovHC(Study2.LDP.OC.bin, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

#### robustness check: censored regression (Online Appendix D.2) ####
# function to draw initial values
initial.fun.Tobit <- function(seed1, seed2, n.var, n.id) {
  set.seed(seed1)
  alpha <- rnorm(1)
  beta <- rnorm(n.var)
  sigma <- runif(1, 1, 2)
  list(alpha = alpha, beta = beta, sigma = sigma, 
       .RNG.name = "base::Wichmann-Hill", .RNG.seed = seed2)
}
# draw initial values
Study2.LDP.PS.Tobit.inits <- list()
for (i in 1:3) {
  Study2.LDP.PS.Tobit.inits[[i]] <- 
    initial.fun.Tobit(100 * i, 1000 * i, 18, 
                      max(as.numeric(factor(LDP.PS.data.Study2$name_jp))))
}
# MCMC sampling
Study2.LDP.PS.Tobit <- run.jags(model = "Tobit_JAGS.R", 
                                monitor = c("beta", "alpha", "sigma"), 
                                data = list(y = ifelse(LDP.PS.data.Study2$PS.char > 0, LDP.PS.data.Study2$PS.char, NA), 
                                            z = (LDP.PS.data.Study2$PS.char > 0) * 1, 
                                            x = scale(as.matrix(LDP.PS.data.Study2[, c("distance.1", "distance.2", "top.leader", 
                                                                                       "chair", "minister", "jr.minister", 
                                                                                       "female", "age", "dynastic", "log.term", 
                                                                                       "vote.share", "SMD", "SMD.VS", "zombie", 
                                                                                       "E.44", "E.45", "E.46", "E.47")]), 
                                                      scale = FALSE), 
                                            N = nrow(LDP.PS.data.Study2), 
                                            n.var = 18), 
                                inits = Study2.LDP.PS.Tobit.inits, 
                                n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                                summarise = FALSE, thin = 1, method = "parallel")
# convergence check
print(which(gelman.diag(Study2.LDP.PS.Tobit, multivariate = FALSE)[[1]][, 1] > 1.1))
# posterior summary
round(summary(Study2.LDP.PS.Tobit), 3)
# posterior probability that the coefficients of left-right distance (beta[1])
# and economic distance (beta[2]) are negative
round(mean(unlist(Study2.LDP.PS.Tobit$mcmc[, 1]) < 0), 3)
round(mean(unlist(Study2.LDP.PS.Tobit$mcmc[, 2]) < 0), 3)

Study2.LDP.BC.Tobit.inits <- list()
for (i in 1:3) {
  Study2.LDP.BC.Tobit.inits[[i]] <- 
    initial.fun.Tobit(100 * i, 1000 * i, 18, 
                      max(as.numeric(factor(LDP.BC.data.Study2$name_jp))))
}
Study2.LDP.BC.Tobit <- run.jags(model = "Tobit_JAGS.R", 
                                monitor = c("beta", "alpha", "sigma"), 
                                data = list(y = ifelse(LDP.BC.data.Study2$BC.char > 0, LDP.BC.data.Study2$BC.char, NA), 
                                            z = (LDP.BC.data.Study2$BC.char > 0) * 1, 
                                            x = scale(as.matrix(LDP.BC.data.Study2[, c("distance.1", "distance.2", "top.leader", 
                                                                                       "chair", "minister", "jr.minister", 
                                                                                       "female", "age", "dynastic", "log.term", 
                                                                                       "vote.share", "SMD", "SMD.VS", "zombie", 
                                                                                       "E.44", "E.45", "E.46", "E.47")]), 
                                                      scale = FALSE), 
                                            N = nrow(LDP.BC.data.Study2), 
                                            n.var = 18), 
                                inits = Study2.LDP.BC.Tobit.inits, 
                                n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                                summarise = FALSE, thin = 1, method = "parallel")
print(which(gelman.diag(Study2.LDP.BC.Tobit, multivariate = FALSE)[[1]][, 1] > 1.1))
round(summary(Study2.LDP.BC.Tobit), 3)
round(mean(unlist(Study2.LDP.BC.Tobit$mcmc[, 1]) < 0), 3)
round(mean(unlist(Study2.LDP.BC.Tobit$mcmc[, 2]) < 0), 3)

Study2.LDP.OC.Tobit.inits <- list()
for (i in 1:3) {
  Study2.LDP.OC.Tobit.inits[[i]] <- 
    initial.fun.Tobit(100 * i, 1000 * i, 18, 
                      max(as.numeric(factor(LDP.OC.data.Study2$name_jp))))
}
Study2.LDP.OC.Tobit <- run.jags(model = "Tobit_JAGS.R", 
                                monitor = c("beta", "alpha", "sigma"), 
                                data = list(y = ifelse(LDP.OC.data.Study2$OC.char > 0, LDP.OC.data.Study2$OC.char, NA), 
                                            z = (LDP.OC.data.Study2$OC.char > 0) * 1, 
                                            x = scale(as.matrix(LDP.OC.data.Study2[, c("distance.1", "distance.2", "top.leader", 
                                                                                       "chair", "minister", "jr.minister", 
                                                                                       "female", "age", "dynastic", "log.term", 
                                                                                       "vote.share", "SMD", "SMD.VS", "zombie", 
                                                                                       "E.44", "E.45", "E.46", "E.47")]), 
                                                      scale = FALSE), 
                                            N = nrow(LDP.OC.data.Study2), 
                                            n.var = 18), 
                                inits = Study2.LDP.OC.Tobit.inits, 
                                n.chains = 3, burnin = 5000, sample = 5000, modules = "glm", 
                                summarise = FALSE, thin = 1, method = "parallel")
print(which(gelman.diag(Study2.LDP.OC.Tobit, multivariate = FALSE)[[1]][, 1] > 1.1))
round(summary(Study2.LDP.OC.Tobit), 3)
round(mean(unlist(Study2.LDP.OC.Tobit$mcmc[, 1]) < 0), 3)
round(mean(unlist(Study2.LDP.OC.Tobit$mcmc[, 2]) < 0), 3)

#### robustness check: another definition of top leaders (Online Appendix D.3) ####
Study2.PS.w.P <- plm(PS.char ~ distance.1.w.P + distance.2.w.P + 
                       top.leader.w.P + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = LDP.PS.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.PS.w.P, 
              vcov = vcovHC(Study2.PS.w.P, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.BC.w.P <- plm(BC.char ~ distance.1.w.P + distance.2.w.P + 
                       top.leader.w.P + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = LDP.BC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.BC.w.P, 
              vcov = vcovHC(Study2.BC.w.P, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)

Study2.OC.w.P <- plm(OC.char ~ distance.1.w.P + distance.2.w.P + 
                       top.leader.w.P + chair + minister + jr.minister + 
                       female + age + dynastic + log.term + 
                       vote.share * SMD + zombie + 
                       E.44 + E.45 + E.46 + E.47, 
                     data = LDP.OC.pdata.Study2, 
                     model = "pooling")
round(summary(Study2.OC.w.P, 
              vcov = vcovHC(Study2.OC.w.P, 
                            method = "arellano", 
                            type = "HC2"))$coefficients, 3)